【期末報告】台大管理學院的迷思

組員: 工管四 吳蔚平 / 工管四 林哲彣 / 會計四 郭書瑋 / 會計四 禹翔仁

library(png)
library(grid)
img <- readPNG("cover.png")
## Warning in readPNG("cover.png"): libpng warning: iCCP: known incorrect sRGB
## profile
 grid.raster(img, just = "center")

迷思一、台大管理學院的學生的學生都不讀研究所?? 薪水都比理工科的學生低??

#update.packages("dplyr")
#library(dplyr)
#library(ggplot2)
# install.packages("ggplot")
# library(ggplot)
library(plotly)
## Warning: package 'plotly' was built under R version 3.3.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.3.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(data.table)
require(stats)

pathMaster <- "D:/R/HW4/master.csv"
master_data <- read.table(pathMaster, header = TRUE, sep = ",")

df <- data.frame(
  group = c("文學院","理學院","社會科學院","醫學院","工學院","生物資源暨農學院","管理學院","公共衛生學院","電機資訊學院","法律學院","生命科學院"),
  value = master_data$碩士總數)

#pie chart
bp<- ggplot(df, aes(x="", y=value, fill=group))+
  geom_bar(width = 1, stat = "identity")

pie <- bp + coord_polar("y", start=0)
pie

上圖為 2014 年台大各學院碩士生占比圓餅圖。由圖中可知,管理學院的碩士占比位居第三。但無法推翻假說,因為此圓餅圖為 各學院碩士生 / 整體碩士生 求得,管理學院可能大多由非管院大學部學生就讀。

目前管理學院的現況是大部分的同學不會繼續攻讀研究所。綜合現況與上圖可以推知就讀管院研究所大多為非原科系學生,與理工學院就讀研究所狀況大不相同。那麼是何項原因造成這項結果呢?我們推論薪資因素為主要操控因子。

# Master: Management
df <- data.frame(
  group = c("60,001元以上", "40,001元至60,000元", "31,001元至40,000元", "31,000元以下"),
  value = c(0.4718,0.4769,  0.0462, 0.0051))

bp<- ggplot(df, aes(x="", y=value, fill=group))+
  geom_bar(width = 1, stat = "identity")

pie <- bp + coord_polar("y", start=0)
pie + scale_fill_brewer(palette="Greens")

#Bachelor: Management
df2 <- data.frame(
  group = c("50001以上", "40001至50000", "30001至40000", "31,000元以下"),
  value = c(0.3011,0.3068,0.3466,0.0455))

bp<- ggplot(df2, aes(x="", y=value, fill=group))+
  geom_bar(width = 1, stat = "identity")

pie <- bp + coord_polar("y", start=0)
pie + scale_fill_brewer(palette="Greens")

【台大管理學院碩士 / 學士就業薪資比較 】

由上兩張圖(2014 年數據)可知,攻讀管理學院研究所的薪資相較大學部也是大幅上升。但由於不能確定研究所學生是來自於台大管院學生,因此迷思無法破解。只能說台大管院研究所與大學部畢業對於薪資是有顯著影響的。

接者比較理工科學生大學部研究所畢業的差距。

# Master: 理工
df <- data.frame(
  group = c("60,001元以上", "40,001元至60,000元", "31,001元至40,000元", "31,000元以下","其他(不願提供薪資或拒答)"),
  value = c(0.8839, 1.6345, 0.365,  0.0974, 0.0194))

bp<- ggplot(df, aes(x="", y=value, fill=group))+
  geom_bar(width = 1, stat = "identity")

pie <- bp + coord_polar("y", start=0)
pie+ scale_fill_brewer(palette="Blues")

#Bachelor: 理工
df2 <- data.frame(
  group = c("50001以上", "40001至50000", "30001至40000", "31,000元以下","其他(不願提供薪資或拒答)"),
  value = c(0.8451, 0.754,  0.8325, 0.556,0.0124))

bp<- ggplot(df2, aes(x="", y=value, fill=group))+
  geom_bar(width = 1, stat = "identity")

pie <- bp + coord_polar("y", start=0)
pie+ scale_fill_brewer(palette="Blues")

【台大理工科碩士 / 學士就業薪資比較 】 上圖為台大理工科系(理學院、工學院、電資學院加總)研究所與大學部的就業薪資圓餅圖。 → 薪資差距相當巨大,可以證明為何台大理工科學生幾乎會選擇繼續攻讀研究所。

迷思二、管院的交換生比其他學院高??

rawdata = read.csv('D:/R/HW3/exchange.csv',header = TRUE,stringsAsFactors=FALSE)

# incoming 
answer01 <- local({
  slice(rawdata, 2:22) 
})
colnames(answer01) <- c("Year","Total_In","文學院In","理學院In","社會科學院In","醫學院In","工學院In","農學院In","管理學院In","公衛學院In","電資學院In","法律學院In","生命科學院In")
rownames(answer01) <- 1:nrow(answer01)

# change NA into 0
answer01[is.na(answer01)] <- 0
for ( ii in c("Total_In","文學院In","理學院In","社會科學院In","醫學院In","工學院In","農學院In","管理學院In","公衛學院In","電資學院In","法律學院In","生命科學院In")){
     answer01[[ii]] <- as.numeric(answer01[[ii]])
}
# rate為管理學院佔全部交換生比例
for ( ii in c("Total_In","文學院In","理學院In","社會科學院In","醫學院In","工學院In","農學院In","管理學院In","公衛學院In","電資學院In","法律學院In","生命科學院In")){
  answer01[[ii]] <- as.numeric(answer01[[ii]])
  for (jj in 1:21){
    rate_In <- answer01$管理學院In[jj] / answer01$Total[jj] # calculate the rate of management / total
    answer01[["rate_in"]][jj] = rate_In
  }
}

# 刪除NA
answer01 <- na.omit(answer01)

lastyearId = length(answer01$Year)
firstYear = as.numeric(answer01$Year[1])
lastYear = as.numeric(answer01$Year[lastyearId])
n = lastYear - firstYear + 1
allType = names(answer01)
rownames(answer01) <- 1:nrow(answer01)

typeId = c(3:13)
newTable = data.frame()

for( nid in c(2:n) )
{
  year = as.matrix(rep(answer01$Year[nid], length(answer01[nid,typeId])))
  people = as.matrix(as.numeric(answer01[nid,typeId]))
  type = as.matrix(as.character(allType[typeId]))
  temp = cbind(year, people, type)
  newTable = rbind(newTable, temp)
}
names(newTable) = c('year', 'people', 'department')

#install.packages("colorRamps")
library(colorRamps)
#install.packages("RColorBrewer")
library(RColorBrewer)
colourCount = length(unique(mtcars$hp))
getPalette = colorRampPalette(brewer.pal(9, "Set1"))

p <- plot_ly(data = newTable, x = ~year, y = ~people, color = ~department, colors = getPalette(11)) %>%
  add_lines(yaxis = list(range=c(0:10)))
p

將台大所有系所的交換學生依據年度進行比較,看出管理學院交換學生的數量的確大幅高於其他學院,且近幾年更是大幅成長。

# 選出2013年的資料
piein <- local({
  slice(answer01, 20:21) 
})
aa <- c(piein$文學院, piein$理學院, piein$社會科學院, piein$醫學院, piein$工學院, piein$農學院, piein$管理學院, piein$公衛學院, piein$電資學院, piein$法律學院, piein$生命科學院)
names(aa) <- c("文學院", "理學院", "社會科學院","醫學院","工學院", "農學院", "管理學院", "公衛學院", "電資學院", "法律學院", "生命科學院")
pie(aa, col = rainbow(24))

以上圓餅圖則是呈現 2013 年台大交換學生的分布比例,也可以進一步證明近幾年管理學院的交換生人數位居台大各大院所最高。

迷思四、管院的妹子真的多嗎?

大家都說,管院有著滿~山~滿~谷的妹子 然而,管院的男女比真的是在台大各院裡面名列前茅的嗎?
我們分析了2004-2016總共13年的各院以及管院五系的男女比例,
男女比計算方式為「女性人數 / 男性人數」

以下是我們的圖表:

g.data <- read.csv("D:/R/Final/gender2.csv")
#head(g.data)

library(plotly)

g.chart <- plot_ly(data = g.data , x = ~year, y = ~gender , color = ~dept) %>%
  add_lines(yaxis = list(range=c(0:10)))

g2.data <- subset(g.data , dept == "管理學院" | dept == "公衛學院" | dept == "社會科學院" | dept == "法律學院" | dept == "文學院" )
g2.chart <- plot_ly(data = g2.data , x = ~year, y = ~gender , color = ~dept) %>% add_lines(yaxis = list(range=c(0:10)))

g3.data <- subset(g.data , dept == "管理學院" | dept == "管院扣資管" | dept == "公衛學院" | dept == "社會科學院" | dept == "法律學院" | dept == "文學院" )
g3.chart <- plot_ly(data = g3.data , x = ~year, y = ~gender , color = ~dept) %>% add_lines(yaxis = list(range=c(0:10)))

g4.data <- subset(g.data , dept == "管理學院" | dept == "管院扣資管" )
g4.chart <- plot_ly(data = g4.data , x = ~year, y = ~gender , color = ~dept) %>% add_lines(yaxis = list(range=c(0:10)))

g5.data <- subset(g.data , dept == "管理學院" | dept == "會計系" | dept == "國企系" | dept == "工管系" | dept == "財金系"| dept == "資管系" )
g5.chart <- plot_ly(data = g5.data , x = ~year, y = ~gender , color = ~dept) %>% add_lines(yaxis = list(range=c(0:10)))

表一 : 台大各院及管院五系之性別比總表

g.chart
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

再把高於管院性別比的院給挑出來,得出下圖

表二 : 性別比前五名

g2.chart
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

由上表可以知道,性別比的排名基本上是
1.文學院(第一名獨走)
2.公衛學院
3.社科學院
4.法律學院(與社科院不相上下)
5.管理學院(在2009年後即低於法律學院)

由此可知,在台大11個學院當中排名第五,屬於中段班的成員 關於台大管院妹子比例是台大名列前茅的流言 — 破解!!

咦? 等等,你們說管院可能是資管系把性別比拉低的? 那我們就繼續來看看是不是真的吧!
我們將管院扣掉資管系的同學並再計算性別比之後,得出下表

表三 : 台大各院及管院五系(含扣掉資管)之性別比總表

g3.chart
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

看起來似乎不相上下哦?
我們再將管院及扣掉資管之後的管院拉出來比較,見下表:

表四 : 台大管院性別比 vs 台大管院扣掉資管系後之性別比

g4.chart
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

燈愣,我們發現一個驚人的事實!
在2012前,少了資管系的管院性別比反而是高於整個管院非常多的
資管系其實不是拉低管院性別比的元兇!
大家真的誤會他們啦~

因此,我們又有了一個疑問,既然不是資管系讓我們在2009年之後掉出「性別比的前段班」
那究竟是什麼系出了問題呢?
於是我們再拉出一張管院及管院五系的折線圖來看看:

表五 : 管院五系性別比

g5.chart
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

從上表可以發現,財金系在近幾年的性別比呈現節節敗退的狀況
於是我們決定使用迴歸分析來判斷是否是不是財金系才是影響管院性別比的元兇

究竟誰是元兇?

fin.gender <- subset(g.data , dept == "財金系" , select= gender )
im.gender <- subset(g.data , dept == "資管系" , select= gender )
com.gender <- subset(g.data , dept == "管理學院" , select= gender )

##財金系與管院性別比之相關係數
cor(com.gender , fin.gender)
##           gender
## gender 0.7130391
##資管系與管院性別比之相關係數
cor(com.gender , im.gender)
##           gender
## gender 0.2366894

我們發現了財金系是這十幾年來左右管院性別比高低的主因!
綜上所述,本節的研究得出以下結論:
1.管院的性別比在台大各院不是前段班
2.資管系不完全是降低管院性別比的主因
3.財金系是這幾年左右管院性別比的重要因素!

#install.packages("devtools")
library(devtools)
## Warning: package 'devtools' was built under R version 3.3.3
#install.packages("ggplot2")
#install.packages("ggmap")
library(ggplot2)
library(ggmap)
## Warning: package 'ggmap' was built under R version 3.3.3
## 
## Attaching package: 'ggmap'
## The following object is masked from 'package:plotly':
## 
##     wind
library(mapproj)
## Warning: package 'mapproj' was built under R version 3.3.3
## Loading required package: maps
## Warning: package 'maps' was built under R version 3.3.3
ntu_management <- c(25.013993, 121.538106) # 管院
ntu_socialscience <- c(25.020538, 121.542366) # 社科
ntu_law <- c(25.020503, 121.543597) # 法律 
ntu_EE <- c(25.019035, 121.538761) # 電機
ntu_g <- c(25.018907, 121.540226) # 工學院
ntu_info <- c(25.019424, 121.541551) # 資工
ntu_word <- c(25.017924, 121.536747) # 文學
data <- c(0.904, 1.148, 1.131,0.148,0.260, 0.125, 2.003)
lon <- rbind(ntu_management[2], ntu_socialscience[2], ntu_law[2], ntu_EE[2], ntu_g[2], ntu_info[2], ntu_word[2])
lat <- rbind(ntu_management[1], ntu_socialscience[1], ntu_law[1], ntu_EE[1], ntu_g[1], ntu_info[1], ntu_word[1])

ntuData = data.frame(lon, lat,data)
ntuData
##        lon      lat  data
## 1 121.5381 25.01399 0.904
## 2 121.5424 25.02054 1.148
## 3 121.5436 25.02050 1.131
## 4 121.5388 25.01903 0.148
## 5 121.5402 25.01891 0.260
## 6 121.5416 25.01942 0.125
## 7 121.5367 25.01792 2.003
map <- get_map(location = 'national taiwan university', zoom = 16, language = "zh-TW")
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=national+taiwan+university&zoom=16&size=640x640&scale=2&maptype=terrain&language=zh-TW&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=national%20taiwan%20university&sensor=false
ggmap(map)

#map <- get_map(location = c(lon = -95.36, lat = 29.76),
               #zoom = 10, language = "zh-TW")
ggmap(map) + 
  geom_point(aes(x = lon, y = lat,  size = data), data = ntuData, color = "red")